{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Hello world\n",
    "\n",
    "this is my first _py_notebook_."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n"
     ]
    }
   ],
   "source": [
    "print [i for i in xrange(0, 10)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from pandas import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "path = \"usagov_bitly_data2012-03-16-1331923249.txt\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "records = [json.loads(line) for line in open(path)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "?json.load"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{u'a': u'GoogleMaps/RochesterNY',\n",
       " u'c': u'US',\n",
       " u'cy': u'Provo',\n",
       " u'g': u'mwszkS',\n",
       " u'gr': u'UT',\n",
       " u'h': u'mwszkS',\n",
       " u'hc': 1308262393,\n",
       " u'hh': u'j.mp',\n",
       " u'l': u'bitly',\n",
       " u'll': [40.218102, -111.613297],\n",
       " u'nk': 0,\n",
       " u'r': u'http://www.AwareMap.com/',\n",
       " u't': 1331923249,\n",
       " u'tz': u'America/Denver',\n",
       " u'u': u'http://www.monroecounty.gov/etc/911/rss.php'}"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "records[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "u'GoogleMaps/RochesterNY'"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "records[1][\"a\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{u'a': u'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11',\n",
       "  u'al': u'en-US,en;q=0.8',\n",
       "  u'c': u'US',\n",
       "  u'cy': u'Danvers',\n",
       "  u'g': u'A6qOVH',\n",
       "  u'gr': u'MA',\n",
       "  u'h': u'wfLQtf',\n",
       "  u'hc': 1331822918,\n",
       "  u'hh': u'1.usa.gov',\n",
       "  u'l': u'orofrog',\n",
       "  u'll': [42.576698, -70.954903],\n",
       "  u'nk': 1,\n",
       "  u'r': u'http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/wfLQtf',\n",
       "  u't': 1331923247,\n",
       "  u'tz': u'America/New_York',\n",
       "  u'u': u'http://www.ncbi.nlm.nih.gov/pubmed/22415991'}]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "records[:1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data = DataFrame(records)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_heartbeat_</th>\n",
       "      <th>a</th>\n",
       "      <th>al</th>\n",
       "      <th>c</th>\n",
       "      <th>cy</th>\n",
       "      <th>g</th>\n",
       "      <th>gr</th>\n",
       "      <th>h</th>\n",
       "      <th>hc</th>\n",
       "      <th>hh</th>\n",
       "      <th>kw</th>\n",
       "      <th>l</th>\n",
       "      <th>ll</th>\n",
       "      <th>nk</th>\n",
       "      <th>r</th>\n",
       "      <th>t</th>\n",
       "      <th>tz</th>\n",
       "      <th>u</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...</td>\n",
       "      <td>en-US,en;q=0.8</td>\n",
       "      <td>US</td>\n",
       "      <td>Danvers</td>\n",
       "      <td>A6qOVH</td>\n",
       "      <td>MA</td>\n",
       "      <td>wfLQtf</td>\n",
       "      <td>1331822918</td>\n",
       "      <td>1.usa.gov</td>\n",
       "      <td>NaN</td>\n",
       "      <td>orofrog</td>\n",
       "      <td>[42.576698, -70.954903]</td>\n",
       "      <td>1</td>\n",
       "      <td>http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/...</td>\n",
       "      <td>1331923247</td>\n",
       "      <td>America/New_York</td>\n",
       "      <td>http://www.ncbi.nlm.nih.gov/pubmed/22415991</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>GoogleMaps/RochesterNY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>US</td>\n",
       "      <td>Provo</td>\n",
       "      <td>mwszkS</td>\n",
       "      <td>UT</td>\n",
       "      <td>mwszkS</td>\n",
       "      <td>1308262393</td>\n",
       "      <td>j.mp</td>\n",
       "      <td>NaN</td>\n",
       "      <td>bitly</td>\n",
       "      <td>[40.218102, -111.613297]</td>\n",
       "      <td>0</td>\n",
       "      <td>http://www.AwareMap.com/</td>\n",
       "      <td>1331923249</td>\n",
       "      <td>America/Denver</td>\n",
       "      <td>http://www.monroecounty.gov/etc/911/rss.php</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ...</td>\n",
       "      <td>en-US</td>\n",
       "      <td>US</td>\n",
       "      <td>Washington</td>\n",
       "      <td>xxr3Qb</td>\n",
       "      <td>DC</td>\n",
       "      <td>xxr3Qb</td>\n",
       "      <td>1331919941</td>\n",
       "      <td>1.usa.gov</td>\n",
       "      <td>NaN</td>\n",
       "      <td>bitly</td>\n",
       "      <td>[38.9007, -77.043098]</td>\n",
       "      <td>1</td>\n",
       "      <td>http://t.co/03elZC4Q</td>\n",
       "      <td>1331923250</td>\n",
       "      <td>America/New_York</td>\n",
       "      <td>http://boxer.senate.gov/en/press/releases/0316...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)...</td>\n",
       "      <td>pt-br</td>\n",
       "      <td>BR</td>\n",
       "      <td>Braz</td>\n",
       "      <td>zCaLwp</td>\n",
       "      <td>27</td>\n",
       "      <td>zUtuOu</td>\n",
       "      <td>1331923068</td>\n",
       "      <td>1.usa.gov</td>\n",
       "      <td>NaN</td>\n",
       "      <td>alelex88</td>\n",
       "      <td>[-23.549999, -46.616699]</td>\n",
       "      <td>0</td>\n",
       "      <td>direct</td>\n",
       "      <td>1331923249</td>\n",
       "      <td>America/Sao_Paulo</td>\n",
       "      <td>http://apod.nasa.gov/apod/ap120312.html</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...</td>\n",
       "      <td>en-US,en;q=0.8</td>\n",
       "      <td>US</td>\n",
       "      <td>Shrewsbury</td>\n",
       "      <td>9b6kNl</td>\n",
       "      <td>MA</td>\n",
       "      <td>9b6kNl</td>\n",
       "      <td>1273672411</td>\n",
       "      <td>bit.ly</td>\n",
       "      <td>NaN</td>\n",
       "      <td>bitly</td>\n",
       "      <td>[42.286499, -71.714699]</td>\n",
       "      <td>0</td>\n",
       "      <td>http://www.shrewsbury-ma.gov/selco/</td>\n",
       "      <td>1331923251</td>\n",
       "      <td>America/New_York</td>\n",
       "      <td>http://www.shrewsbury-ma.gov/egov/gallery/1341...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   _heartbeat_                                                  a  \\\n",
       "0          NaN  Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...   \n",
       "1          NaN                             GoogleMaps/RochesterNY   \n",
       "2          NaN  Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ...   \n",
       "3          NaN  Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)...   \n",
       "4          NaN  Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...   \n",
       "\n",
       "               al   c          cy       g  gr       h          hc         hh  \\\n",
       "0  en-US,en;q=0.8  US     Danvers  A6qOVH  MA  wfLQtf  1331822918  1.usa.gov   \n",
       "1             NaN  US       Provo  mwszkS  UT  mwszkS  1308262393       j.mp   \n",
       "2           en-US  US  Washington  xxr3Qb  DC  xxr3Qb  1331919941  1.usa.gov   \n",
       "3           pt-br  BR        Braz  zCaLwp  27  zUtuOu  1331923068  1.usa.gov   \n",
       "4  en-US,en;q=0.8  US  Shrewsbury  9b6kNl  MA  9b6kNl  1273672411     bit.ly   \n",
       "\n",
       "    kw         l                        ll  nk  \\\n",
       "0  NaN   orofrog   [42.576698, -70.954903]   1   \n",
       "1  NaN     bitly  [40.218102, -111.613297]   0   \n",
       "2  NaN     bitly     [38.9007, -77.043098]   1   \n",
       "3  NaN  alelex88  [-23.549999, -46.616699]   0   \n",
       "4  NaN     bitly   [42.286499, -71.714699]   0   \n",
       "\n",
       "                                                   r           t  \\\n",
       "0  http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/...  1331923247   \n",
       "1                           http://www.AwareMap.com/  1331923249   \n",
       "2                               http://t.co/03elZC4Q  1331923250   \n",
       "3                                             direct  1331923249   \n",
       "4                http://www.shrewsbury-ma.gov/selco/  1331923251   \n",
       "\n",
       "                  tz                                                  u  \n",
       "0   America/New_York        http://www.ncbi.nlm.nih.gov/pubmed/22415991  \n",
       "1     America/Denver        http://www.monroecounty.gov/etc/911/rss.php  \n",
       "2   America/New_York  http://boxer.senate.gov/en/press/releases/0316...  \n",
       "3  America/Sao_Paulo            http://apod.nasa.gov/apod/ap120312.html  \n",
       "4   America/New_York  http://www.shrewsbury-ma.gov/egov/gallery/1341...  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
